1   package org.apache.lucene.search;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one or more
5    * contributor license agreements.  See the NOTICE file distributed with
6    * this work for additional information regarding copyright ownership.
7    * The ASF licenses this file to You under the Apache License, Version 2.0
8    * (the "License"); you may not use this file except in compliance with
9    * the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  import org.apache.lucene.document.Field;
21  import org.apache.lucene.store.Directory;
22  import org.apache.lucene.util.LuceneTestCase;
23  import org.apache.lucene.analysis.MockAnalyzer;
24  import org.apache.lucene.document.Document;
25  import org.apache.lucene.index.DirectoryReader;
26  import org.apache.lucene.index.IndexReader;
27  import org.apache.lucene.index.MultiFields;
28  import org.apache.lucene.index.RandomIndexWriter;
29  import org.apache.lucene.index.Term;
30  import org.apache.lucene.index.Terms;
31  
32  import java.io.IOException;
33  
34  /**
35   * TestWildcard tests the '*' and '?' wildcard characters.
36   */
37  public class TestWildcard extends LuceneTestCase {
38    
39    public void testEquals() {
40      WildcardQuery wq1 = new WildcardQuery(new Term("field", "b*a"));
41      WildcardQuery wq2 = new WildcardQuery(new Term("field", "b*a"));
42      WildcardQuery wq3 = new WildcardQuery(new Term("field", "b*a"));
43  
44      // reflexive?
45      assertEquals(wq1, wq2);
46      assertEquals(wq2, wq1);
47  
48      // transitive?
49      assertEquals(wq2, wq3);
50      assertEquals(wq1, wq3);
51  
52      assertFalse(wq1.equals(null));
53  
54      FuzzyQuery fq = new FuzzyQuery(new Term("field", "b*a"));
55      assertFalse(wq1.equals(fq));
56      assertFalse(fq.equals(wq1));
57    }
58    
59    /**
60     * Tests if a WildcardQuery that has no wildcard in the term is rewritten to a single
61     * TermQuery. The boost should be preserved, and the rewrite should return
62     * a ConstantScoreQuery if the WildcardQuery had a ConstantScore rewriteMethod.
63     */
64    public void testTermWithoutWildcard() throws IOException {
65        Directory indexStore = getIndexStore("field", new String[]{"nowildcard", "nowildcardx"});
66        IndexReader reader = DirectoryReader.open(indexStore);
67        IndexSearcher searcher = newSearcher(reader);
68  
69        MultiTermQuery wq = new WildcardQuery(new Term("field", "nowildcard"));
70        assertMatches(searcher, wq, 1);
71  
72        wq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE);
73        Query q = searcher.rewrite(wq);
74        assertTrue(q instanceof TermQuery);
75        
76        wq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE);
77        q = searcher.rewrite(wq);
78        assertTrue(q instanceof MultiTermQueryConstantScoreWrapper);
79        
80        wq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
81        q = searcher.rewrite(wq);
82        assertTrue(q instanceof ConstantScoreQuery);
83        reader.close();
84        indexStore.close();
85    }
86    
87    /**
88     * Tests if a WildcardQuery with an empty term is rewritten to an empty BooleanQuery
89     */
90    public void testEmptyTerm() throws IOException {
91      Directory indexStore = getIndexStore("field", new String[]{"nowildcard", "nowildcardx"});
92      IndexReader reader = DirectoryReader.open(indexStore);
93      IndexSearcher searcher = newSearcher(reader);
94  
95      MultiTermQuery wq = new WildcardQuery(new Term("field", ""));
96      wq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE);
97      assertMatches(searcher, wq, 0);
98      Query q = searcher.rewrite(wq);
99      assertTrue(q instanceof BooleanQuery);
100     assertEquals(0, ((BooleanQuery) q).clauses().size());
101     reader.close();
102     indexStore.close();
103   }
104   
105   /**
106    * Tests if a WildcardQuery that has only a trailing * in the term is
107    * rewritten to a single PrefixQuery. The boost and rewriteMethod should be
108    * preserved.
109    */
110   public void testPrefixTerm() throws IOException {
111     Directory indexStore = getIndexStore("field", new String[]{"prefix", "prefixx"});
112     IndexReader reader = DirectoryReader.open(indexStore);
113     IndexSearcher searcher = newSearcher(reader);
114 
115     MultiTermQuery wq = new WildcardQuery(new Term("field", "prefix*"));
116     assertMatches(searcher, wq, 2);
117     
118     wq = new WildcardQuery(new Term("field", "*"));
119     assertMatches(searcher, wq, 2);
120     Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "field");
121     assertFalse(wq.getTermsEnum(terms).getClass().getSimpleName().contains("AutomatonTermsEnum"));
122     reader.close();
123     indexStore.close();
124   }
125 
126   /**
127    * Tests Wildcard queries with an asterisk.
128    */
129   public void testAsterisk()
130       throws IOException {
131     Directory indexStore = getIndexStore("body", new String[]
132     {"metal", "metals"});
133     IndexReader reader = DirectoryReader.open(indexStore);
134     IndexSearcher searcher = newSearcher(reader);
135     Query query1 = new TermQuery(new Term("body", "metal"));
136     Query query2 = new WildcardQuery(new Term("body", "metal*"));
137     Query query3 = new WildcardQuery(new Term("body", "m*tal"));
138     Query query4 = new WildcardQuery(new Term("body", "m*tal*"));
139     Query query5 = new WildcardQuery(new Term("body", "m*tals"));
140 
141     BooleanQuery.Builder query6 = new BooleanQuery.Builder();
142     query6.add(query5, BooleanClause.Occur.SHOULD);
143 
144     BooleanQuery.Builder query7 = new BooleanQuery.Builder();
145     query7.add(query3, BooleanClause.Occur.SHOULD);
146     query7.add(query5, BooleanClause.Occur.SHOULD);
147 
148     // Queries do not automatically lower-case search terms:
149     Query query8 = new WildcardQuery(new Term("body", "M*tal*"));
150 
151     assertMatches(searcher, query1, 1);
152     assertMatches(searcher, query2, 2);
153     assertMatches(searcher, query3, 1);
154     assertMatches(searcher, query4, 2);
155     assertMatches(searcher, query5, 1);
156     assertMatches(searcher, query6.build(), 1);
157     assertMatches(searcher, query7.build(), 2);
158     assertMatches(searcher, query8, 0);
159     assertMatches(searcher, new WildcardQuery(new Term("body", "*tall")), 0);
160     assertMatches(searcher, new WildcardQuery(new Term("body", "*tal")), 1);
161     assertMatches(searcher, new WildcardQuery(new Term("body", "*tal*")), 2);
162     reader.close();
163     indexStore.close();
164   }
165 
166   /**
167    * Tests Wildcard queries with a question mark.
168    *
169    * @throws IOException if an error occurs
170    */
171   public void testQuestionmark()
172       throws IOException {
173     Directory indexStore = getIndexStore("body", new String[]
174     {"metal", "metals", "mXtals", "mXtXls"});
175     IndexReader reader = DirectoryReader.open(indexStore);
176     IndexSearcher searcher = newSearcher(reader);
177     Query query1 = new WildcardQuery(new Term("body", "m?tal"));
178     Query query2 = new WildcardQuery(new Term("body", "metal?"));
179     Query query3 = new WildcardQuery(new Term("body", "metals?"));
180     Query query4 = new WildcardQuery(new Term("body", "m?t?ls"));
181     Query query5 = new WildcardQuery(new Term("body", "M?t?ls"));
182     Query query6 = new WildcardQuery(new Term("body", "meta??"));
183     
184     assertMatches(searcher, query1, 1); 
185     assertMatches(searcher, query2, 1);
186     assertMatches(searcher, query3, 0);
187     assertMatches(searcher, query4, 3);
188     assertMatches(searcher, query5, 0);
189     assertMatches(searcher, query6, 1); // Query: 'meta??' matches 'metals' not 'metal'
190     reader.close();
191     indexStore.close();
192   }
193 
194   /**
195    * Tests if wildcard escaping works
196    */
197   public void testEscapes() throws Exception {
198     Directory indexStore = getIndexStore("field", 
199         new String[]{"foo*bar", "foo??bar", "fooCDbar", "fooSOMETHINGbar", "foo\\"});
200     IndexReader reader = DirectoryReader.open(indexStore);
201     IndexSearcher searcher = newSearcher(reader);
202 
203     // without escape: matches foo??bar, fooCDbar, foo*bar, and fooSOMETHINGbar
204     WildcardQuery unescaped = new WildcardQuery(new Term("field", "foo*bar"));
205     assertMatches(searcher, unescaped, 4);
206     
207     // with escape: only matches foo*bar
208     WildcardQuery escaped = new WildcardQuery(new Term("field", "foo\\*bar"));
209     assertMatches(searcher, escaped, 1);
210     
211     // without escape: matches foo??bar and fooCDbar
212     unescaped = new WildcardQuery(new Term("field", "foo??bar"));
213     assertMatches(searcher, unescaped, 2);
214     
215     // with escape: matches foo??bar only
216     escaped = new WildcardQuery(new Term("field", "foo\\?\\?bar"));
217     assertMatches(searcher, escaped, 1);
218     
219     // check escaping at end: lenient parse yields "foo\"
220     WildcardQuery atEnd = new WildcardQuery(new Term("field", "foo\\"));
221     assertMatches(searcher, atEnd, 1);
222     
223     reader.close();
224     indexStore.close();
225   }
226   
227   private Directory getIndexStore(String field, String[] contents)
228       throws IOException {
229     Directory indexStore = newDirectory();
230     RandomIndexWriter writer = new RandomIndexWriter(random(), indexStore);
231     for (int i = 0; i < contents.length; ++i) {
232       Document doc = new Document();
233       doc.add(newTextField(field, contents[i], Field.Store.YES));
234       writer.addDocument(doc);
235     }
236     writer.close();
237 
238     return indexStore;
239   }
240 
241   private void assertMatches(IndexSearcher searcher, Query q, int expectedMatches)
242       throws IOException {
243     ScoreDoc[] result = searcher.search(q, 1000).scoreDocs;
244     assertEquals(expectedMatches, result.length);
245   }
246 
247   /**
248    * Test that wild card queries are parsed to the correct type and are searched correctly.
249    * This test looks at both parsing and execution of wildcard queries.
250    * Although placed here, it also tests prefix queries, verifying that
251    * prefix queries are not parsed into wild card queries, and vice-versa.
252    */
253   public void testParsingAndSearching() throws Exception {
254     String field = "content";
255     String docs[] = {
256         "\\ abcdefg1",
257         "\\79 hijklmn1",
258         "\\\\ opqrstu1",
259     };
260 
261     // queries that should find all docs
262     Query matchAll[] = {
263         new WildcardQuery(new Term(field, "*")),
264         new WildcardQuery(new Term(field, "*1")),
265         new WildcardQuery(new Term(field, "**1")),
266         new WildcardQuery(new Term(field, "*?")),
267         new WildcardQuery(new Term(field, "*?1")),
268         new WildcardQuery(new Term(field, "?*1")),
269         new WildcardQuery(new Term(field, "**")),
270         new WildcardQuery(new Term(field, "***")),
271         new WildcardQuery(new Term(field, "\\\\*"))
272     };
273 
274     // queries that should find no docs
275     Query matchNone[] = {
276         new WildcardQuery(new Term(field, "a*h")),
277         new WildcardQuery(new Term(field, "a?h")),
278         new WildcardQuery(new Term(field, "*a*h")),
279         new WildcardQuery(new Term(field, "?a")),
280         new WildcardQuery(new Term(field, "a?"))
281     };
282 
283     PrefixQuery matchOneDocPrefix[][] = {
284         {new PrefixQuery(new Term(field, "a")),
285          new PrefixQuery(new Term(field, "ab")),
286          new PrefixQuery(new Term(field, "abc"))}, // these should find only doc 0
287 
288         {new PrefixQuery(new Term(field, "h")),
289          new PrefixQuery(new Term(field, "hi")),
290          new PrefixQuery(new Term(field, "hij")),
291          new PrefixQuery(new Term(field, "\\7"))}, // these should find only doc 1
292 
293         {new PrefixQuery(new Term(field, "o")),
294          new PrefixQuery(new Term(field, "op")),
295          new PrefixQuery(new Term(field, "opq")),
296          new PrefixQuery(new Term(field, "\\\\"))}, // these should find only doc 2
297     };
298 
299     WildcardQuery matchOneDocWild[][] = {
300 
301         {new WildcardQuery(new Term(field, "*a*")), // these should find only doc 0
302             new WildcardQuery(new Term(field, "*ab*")),
303             new WildcardQuery(new Term(field, "*abc**")),
304             new WildcardQuery(new Term(field, "ab*e*")),
305             new WildcardQuery(new Term(field, "*g?")),
306             new WildcardQuery(new Term(field, "*f?1"))},
307 
308         {new WildcardQuery(new Term(field, "*h*")), // these should find only doc 1
309             new WildcardQuery(new Term(field, "*hi*")),
310             new WildcardQuery(new Term(field, "*hij**")),
311             new WildcardQuery(new Term(field, "hi*k*")),
312             new WildcardQuery(new Term(field, "*n?")),
313             new WildcardQuery(new Term(field, "*m?1")),
314             new WildcardQuery(new Term(field, "hij**"))},
315 
316         {new WildcardQuery(new Term(field, "*o*")), // these should find only doc 2
317             new WildcardQuery(new Term(field, "*op*")),
318             new WildcardQuery(new Term(field, "*opq**")),
319             new WildcardQuery(new Term(field, "op*q*")),
320             new WildcardQuery(new Term(field, "*u?")),
321             new WildcardQuery(new Term(field, "*t?1")),
322             new WildcardQuery(new Term(field, "opq**"))}
323     };
324 
325     // prepare the index
326     Directory dir = newDirectory();
327     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, 
328         newIndexWriterConfig(new MockAnalyzer(random()))
329         .setMergePolicy(newLogMergePolicy()));
330     for (int i = 0; i < docs.length; i++) {
331       Document doc = new Document();
332       doc.add(newTextField(field, docs[i], Field.Store.NO));
333       iw.addDocument(doc);
334     }
335     iw.close();
336     
337     IndexReader reader = DirectoryReader.open(dir);
338     IndexSearcher searcher = newSearcher(reader);
339     
340     // test queries that must find all
341     for (Query q : matchAll) {
342       if (VERBOSE) System.out.println("matchAll: q=" + q + " " + q.getClass().getName());
343       ScoreDoc[] hits = searcher.search(q, 1000).scoreDocs;
344       assertEquals(docs.length, hits.length);
345     }
346     
347     // test queries that must find none
348     for (Query q : matchNone) {
349       if (VERBOSE) System.out.println("matchNone: q=" + q + " " + q.getClass().getName());
350       ScoreDoc[] hits = searcher.search(q, 1000).scoreDocs;
351       assertEquals(0, hits.length);
352     }
353 
354     // thest the prefi queries find only one doc
355     for (int i = 0; i < matchOneDocPrefix.length; i++) {
356       for (int j = 0; j < matchOneDocPrefix[i].length; j++) {
357         Query q = matchOneDocPrefix[i][j];
358         if (VERBOSE) System.out.println("match 1 prefix: doc="+docs[i]+" q="+q+" "+q.getClass().getName());
359         ScoreDoc[] hits = searcher.search(q, 1000).scoreDocs;
360         assertEquals(1,hits.length);
361         assertEquals(i,hits[0].doc);
362       }
363     }
364 
365     // test the wildcard queries find only one doc
366     for (int i = 0; i < matchOneDocWild.length; i++) {
367       for (int j = 0; j < matchOneDocWild[i].length; j++) {
368         Query q = matchOneDocWild[i][j];
369         if (VERBOSE) System.out.println("match 1 wild: doc="+docs[i]+" q="+q+" "+q.getClass().getName());
370         ScoreDoc[] hits = searcher.search(q, 1000).scoreDocs;
371         assertEquals(1,hits.length);
372         assertEquals(i,hits[0].doc);
373       }
374     }
375 
376     reader.close();
377     dir.close();
378   }
379 }